import pandas as pd
import re

with open('data/SogouMatchData/user_tag_query.10W.TRAIN', 'r', encoding="gbk", errors="ignore") as file:
    data = file.read().split("\n")

records = []
for index, line in enumerate(data):
    parts = re.split(r'[ \t]+', line)
    if len(parts) >= 5:
        query = parts[4:]
        records.extend(query)
        print("line" + str(index))

divided_data = pd.DataFrame(records, columns=["SearchInfo"])
divided_data.rename_axis("Index")
divided_data.to_csv("data/SogouMatchData/HandledData/searchInfo.csv")
